source("../src/functions.R")
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
##
## Attaching package: 'BiocStyle'
## The following objects are masked from 'package:rmarkdown':
##
## html_document, md_document, pdf_document
## The following object is masked from 'package:shiny':
##
## markdown
count <- read.table("../data/count.txt.gz", row.names=1, header=TRUE, sep="\t")
expdesign <- read.table("../data/expdesign.txt.gz", row.names=1, header=TRUE, sep="\t", skip=6, stringsAsFactors = FALSE)
# Merge
# Filtering non single-cells / zero count cells
count %>%
rownames_to_column("gene") %>%
pivot_longer(-gene, names_to="cell", values_to="exp") %>%
mutate(cell = str_replace(cell, "^X", "")) %>%
inner_join(expdesign,
by=c("cell"="Column_name_in_processed_data_file")) %>%
rename_all(tolower) %>%
group_by(cell) %>%
mutate(sum=sum(exp)) %>%
filter(sum != 0 &&
number_of_cells == 1 &&
group_name %in%
c("B cell", "CD8+pDC", "monocyte_or_neutrophil", "NK_cell")) %>%
ungroup %>%
arrange(cell) -> marsdata
# Extract gene expression part
marsdata %>%
select(gene, cell, exp) %>%
pivot_wider(names_from="cell", values_from="exp") -> wide_marsdata
# Gene name
wide_marsdata %>%
select(gene) %>%
data.frame %>%
.[,1] -> genename
# Expression matrix -> SingleCellExperiment
wide_marsdata %>%
select(!gene) %>%
as.matrix %>%
SingleCellExperiment(assays=list(counts=.[])) -> sce_marsdata
# rownames
genename -> rownames(sce_marsdata)
# coldata
marsdata %>%
select(!c(gene, exp)) %>%
distinct %>%
arrange(cell) %>%
DataFrame -> colData(sce_marsdata)
# Analysis workflow of scater
sce_marsdata %>%
logNormCounts %>%
runPCA(ntop=2000, ncomponents=10) %>%
runTSNE(dimred = "PCA") %>%
runUMAP(dimred = "PCA") -> sce_marsdata
pairs(reducedDim(sce_marsdata, "PCA"),
col=factor(colData(sce_marsdata)$group_name),
pch=16, cex=0.5, main="PCA")
plot(reducedDim(sce_marsdata, "TSNE"),
col=factor(colData(sce_marsdata)$group_name),
xlab="tSNE1", ylab="tSNE2",
pch=16, cex=2, main="tSNE")
sce_marsdata %>%
plotReducedDim(dimred="PCA", colour_by="group_name", ncomponents=10)
sce_marsdata %>%
plotReducedDim(dimred="TSNE", colour_by="group_name")
sce_marsdata %>%
plotReducedDim(dimred="UMAP", colour_by="group_name")
reducedDim(sce_marsdata, "TSNE") %>%
cbind(colData(sce_marsdata)$group_name) %>%
data.frame %>%
mutate_at(vars(-X3), as.numeric) %>%
ggplot(aes(x=X1, y=X2, color=X3)) +
geom_point() +
xlab("PC1") +
ylab("PC2") +
theme(legend.title = element_blank())
reducedDim(sce_marsdata, "PCA") %>%
cbind(colData(sce_marsdata)$group_name) %>%
data.frame %>%
mutate_at(vars(-V11), as.numeric) %>%
ggpairs(columns=1:10, aes(color=V11))
reducedDim(sce_marsdata, "PCA") %>%
.[,1:5] %>%
pairsD3(group=colData(sce_marsdata)$group_name,
tooltip=colData(sce_marsdata)$cell)
reducedDim(sce_marsdata) %>%
as_tibble %>%
select(PC1, PC2, PC3) %>%
data.frame %>%
plot_ly(x=~PC1, y=~PC2, z=~PC3,
type = "scatter3d", mode = "markers",
text = colData(sce_marsdata)$cell,
color =~colData(sce_marsdata)$group_name
)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# app <- iSEE(sce_marsdata)
# runApp(app)
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] knitr_1.30 BiocStyle_2.16.1
## [3] rmarkdown_2.5 pairsD3_0.1.0
## [5] shiny_1.5.0 iSEE_2.0.0
## [7] plotly_4.9.2.1 GGally_2.0.0
## [9] scater_1.16.2 SingleCellExperiment_1.10.1
## [11] SummarizedExperiment_1.18.2 DelayedArray_0.14.1
## [13] matrixStats_0.57.0 Biobase_2.48.0
## [15] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [17] IRanges_2.22.2 S4Vectors_0.26.1
## [19] BiocGenerics_0.34.0 forcats_0.5.0
## [21] stringr_1.4.0 dplyr_1.0.2
## [23] purrr_0.3.4 readr_1.4.0
## [25] tidyr_1.1.2 tibble_3.0.4
## [27] ggplot2_3.3.2 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.10
## [3] circlize_0.4.10 plyr_1.8.6
## [5] igraph_1.2.6 lazyeval_0.2.2
## [7] shinydashboard_0.7.1 splines_4.0.2
## [9] crosstalk_1.1.0.1 BiocParallel_1.22.0
## [11] digest_0.6.26 htmltools_0.5.0
## [13] viridis_0.5.1 fansi_0.4.1
## [15] magrittr_1.5 cluster_2.1.0
## [17] ComplexHeatmap_2.4.3 modelr_0.1.8
## [19] colorspace_1.4-1 blob_1.2.1
## [21] rvest_0.3.6 haven_2.3.1
## [23] xfun_0.18 crayon_1.3.4
## [25] RCurl_1.98-1.2 jsonlite_1.7.1
## [27] glue_1.4.2 gtable_0.3.0
## [29] zlibbioc_1.34.0 XVector_0.28.0
## [31] GetoptLong_1.0.4 BiocSingular_1.4.0
## [33] shape_1.4.5 scales_1.1.1
## [35] DBI_1.1.0 miniUI_0.1.1.1
## [37] Rcpp_1.0.5 viridisLite_0.3.0
## [39] xtable_1.8-4 clue_0.3-57
## [41] rsvd_1.0.3 DT_0.16
## [43] htmlwidgets_1.5.2 httr_1.4.2
## [45] FNN_1.1.3 RColorBrewer_1.1-2
## [47] shinyAce_0.4.1 ellipsis_0.3.1
## [49] farver_2.0.3 pkgconfig_2.0.3
## [51] reshape_0.8.8 uwot_0.1.8
## [53] dbplyr_1.4.4 labeling_0.4.2
## [55] tidyselect_1.1.0 rlang_0.4.8
## [57] later_1.1.0.1 munsell_0.5.0
## [59] cellranger_1.1.0 tools_4.0.2
## [61] cli_2.1.0 generics_0.0.2
## [63] rintrojs_0.2.2 broom_0.7.2
## [65] evaluate_0.14 fastmap_1.0.1
## [67] yaml_2.2.1 fs_1.5.0
## [69] nlme_3.1-149 mime_0.9
## [71] xml2_1.3.2 compiler_4.0.2
## [73] rstudioapi_0.11 beeswarm_0.2.3
## [75] png_0.1-7 reprex_0.3.0
## [77] stringi_1.5.3 RSpectra_0.16-0
## [79] lattice_0.20-41 Matrix_1.2-18
## [81] shinyjs_2.0.0 vctrs_0.3.4
## [83] pillar_1.4.6 lifecycle_0.2.0
## [85] BiocManager_1.30.10 GlobalOptions_0.1.2
## [87] BiocNeighbors_1.6.0 cowplot_1.1.0
## [89] data.table_1.13.2 bitops_1.0-6
## [91] irlba_2.3.3 httpuv_1.5.4
## [93] R6_2.4.1 promises_1.1.1
## [95] gridExtra_2.3 vipor_0.4.5
## [97] colourpicker_1.1.0 assertthat_0.2.1
## [99] rjson_0.2.20 shinyWidgets_0.5.4
## [101] withr_2.3.0 GenomeInfoDbData_1.2.3
## [103] mgcv_1.8-33 hms_0.5.3
## [105] grid_4.0.2 DelayedMatrixStats_1.10.1
## [107] Rtsne_0.15 lubridate_1.7.9
## [109] ggbeeswarm_0.6.0